CMPU4091 Visualizing Data

Code
options(repos = c(CRAN = "https://cloud.r-project.org"))

# Function to quietly install and load packages
quiet_library <- function(pkg) {
  suppressMessages(suppressWarnings({
    if (!requireNamespace(pkg, quietly = TRUE)) {
      install.packages(pkg, quiet = TRUE) # Install without messages
    }
    library(pkg, character.only = TRUE) # Load silently
  }))
}

# Install & Load Packages Silently
quiet_library("ggplot2")
quiet_library("plotly")
quiet_library("gapminder") #An excerpt of the data available at Gapminder.org. For each of 142 countries, the package provides values for life expectancy, GDP per capita, and population, every five years, from 1952 to 2007.
quiet_library("tidyverse")
Code
# Setup path to the data 
datapath <- file.path('creative-salaries.csv')

#Load it into the dataframe 
creatives <- read.csv(datapath)

#Rename the columns and create a new column for year over year change
creatives <- creatives %>% 
  rename(Median_2016 = X2016.Median.hourly.earnings, Median_2017 = X2017.Median.hourly.earnings) %>% 
  rowwise() %>% 
  mutate(Percent_Improvement = round((Median_2017-Median_2016)/Median_2016*100,2))


# Create an extended dataset with median salaries for 2016 to 2024
# Before creating the line graph - add values for 2018 to 2024

# Define the year-over-year percentage increases
percentage_increases <- c(
  "2018" = 2.80,
  "2019" = 2.45,
  "2020" = 3.19,
  "2021" = 4.17,
  "2022" = 9.44,
  "2023" = 5.06,
  "2024" = 4.50  # Assuming a 4.5% increase for 2024
)

# Function to calculate median earnings for a given year
calculate_earnings <- function(base_year, base_earnings, target_year) {
  years <- seq(base_year + 1, target_year)
  for (year in years) {
    base_earnings <- base_earnings * (1 + percentage_increases[as.character(year)] / 100)
  }
  return(round(base_earnings, 2))
}

# Extend the dataset with estimated earnings for 2018 to 2024
extended_creatives <- creatives %>%
  rowwise() %>%
  mutate(
    Median_2018 = calculate_earnings(2017, Median_2017, 2018),
    Median_2019 = calculate_earnings(2017, Median_2017, 2019),
    Median_2020 = calculate_earnings(2017, Median_2017, 2020),
    Median_2021 = calculate_earnings(2017, Median_2017, 2021),
    Median_2022 = calculate_earnings(2017, Median_2017, 2022),
    Median_2023 = calculate_earnings(2017, Median_2017, 2023),
    Median_2024 = calculate_earnings(2017, Median_2017, 2024)
  ) %>%
  ungroup()

0. Basic Plotly

0.1 Simple Point graph with ggplotly

Code
# create some basic data
df <- data.frame(
  x = c(1,2,3,4), 
  y = c(1,2,3,4), 
  f = c(1,2,3,4)
)

# Create a simple ggplot
p <- ggplot(df, aes(x = x, y = y, frame = f)) +  # Include frame in aes()
    geom_point()
# ggplot(df, aes(...)): Creates a ggplot object using df as the data source.
# aes(x = x, y = y, frame = f):
# x = x: Maps the x variable from df to the x-axis.
# y = y: Maps the y variable from df to the y-axis.
# frame = f: This is not recognized by ggplot2 but is understood by # ggplotly(). In Plotly, frame creates an animation, where each unique value # of f represents a separate frame in the animation.

ggplotly(p)  # Converts to Plotly with animation

1. Creatives Salaries

1.2 Create a scatterplot Using ggploty

Code
# Create a scatterPlot

scatterPlot <- creatives %>% 
  ggplot(aes(x = Median_2017, y = Percent_Improvement)) + 
  geom_point(alpha=0.7, colour = "red") + 
  labs(x = "Median Occupation Hourly Wage in 2017", 
       y = "% Improvement Year over Year (2016 to 2017)",
       title = "Creative Occupations Median Hourly Wage") +
  geom_hline(yintercept=0, linetype="dashed", color = "darkblue", linewidth=0.5) +
  theme_classic()

# Convert it to an interactive plot using plotly
ggplotly(scatterPlot)

1.2 Add labels and tooltips

Code
#Add the mouseover details through the ggplot aesthetic text property.  
#Assign it to the tooltip in the ggplotly function.

# Setup tool tips
creatives_data <- creatives %>%
  mutate(
    tooltip_text = paste(
      "Occupation: ", Occupation, "\n",
      "2017: ", Median_2017, "\n",
      "2016: ", Median_2016, "\n",
      "% Improvement Year over Year: ", Percent_Improvement, "\n",
      sep = ""
    )
  )
#This new column stores formatted text that will be used as a tooltip in the Plotly visualization.
#This will show median salary for 2016 and 2017 in the tooltip

# Create the scatter plot
scatterPlot <- ggplot(data = creatives_data, 
                      aes(x = Median_2017, 
                          y = Percent_Improvement, 
                          text = tooltip_text)) +
  geom_point(alpha = 0.7, colour = "red") +
  labs(
    x = "Median Occupation Hourly Wage in 2017", 
    y = "% Improvement Year over Year (2016 to 2017)",
    title = "Creative Occupations Median Hourly Wage"
  ) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "darkblue", linewidth = 0.5) +
  theme_classic()

# Convert it to interactive plot with plotly 
#tooltip = "text" controls what information appears in the tooltip (hover box).
#"text" refers to the aesthetic mapped to text inside aes() in ggplot()
ggplotly(scatterPlot, tooltip = "text")

1.3 Create Multi-Line Time Chart

Code
# Convert exteneded data to long format for line graph
creatives_long <- extended_creatives %>%
  pivot_longer(cols = starts_with("Median_"),  # Select all columns starting with "Median_"
               names_to = "Year", 
               values_to = "Median_Wage") %>%
  mutate(Year = gsub("Median_", "", Year))  # Remove "Median_" to keep only the year


# Add tooltips
creatives_long <- creatives_long %>%
  mutate(
    tooltip_text2 = paste(
      "Occupation: ", Occupation, "\n",
      "Year: ", Year, "\n",
      "Median Wage: ", Median_Wage, "\n",
      sep = ""
    )
  )

# Create the line chart
line_chart <- ggplot(data = creatives_long, 
                     aes(x = Year, 
                         y = Median_Wage, 
                         group = Occupation, 
                         color = Occupation, 
                         text = tooltip_text2)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  labs(
    title = "Median Hourly Wage Trends ",
    x = "Year",
    y = "Median Hourly Wage",
    color = "Occupation"
  ) +
  theme_minimal() +
  theme(legend.position = "none")  # Remove legend for cleaner look

# Convert to interactive plot with tooltips
ggplotly(line_chart, tooltip = "text")
Code
# Subset the data to include only 10 occupations
set.seed(123)  # Ensure reproducibility

# Select a random subset of occupations (e.g., 10 occupations)
random_occupations <- sample(unique(extended_creatives$Occupation), 10)

# Filter dataset to include only the randomly selected occupations
filtered_creatives <- extended_creatives %>%
  filter(Occupation %in% random_occupations)

# Convert dataset to long format for line chart
creatives_long <- filtered_creatives %>%
  pivot_longer(cols = starts_with("Median_"), 
               names_to = "Year", 
               values_to = "Median_Wage") %>%
  mutate(Year = as.numeric(gsub("Median_", "", Year)))  # Extract year as numeric

1.4 Multi-Line Time Chart For 10 Occupations

Code
# Create interactive line chart
line_chart <- ggplot(data = creatives_long, 
                     aes(x = Year, 
                         y = Median_Wage,
                         group = Occupation, 
                         color = Occupation, 
                         text = paste("Occupation: ", Occupation, 
                                      "<br>Year: ", Year, 
                                      "<br>Median Wage: $", Median_Wage))) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  labs(
    title = "Median Hourly Wage Trends (2016-2024)",
    x = "Year",
    y = "Median Hourly Wage",
    color = "Occupation"
  ) +
  theme_minimal()

# Convert to interactive Plotly chart
ggplotly(line_chart, tooltip = "text")

1.5 Multi-Line Time Chart For 10 Occupations with Animation

Code
# Create interactive line chart
line_chart <- ggplot(data = creatives_long, 
                     aes(x = Year, 
                         y = Median_Wage,
                         frame = Year,
                         group = Occupation, 
                         color = Occupation, 
                         text = paste("Occupation: ", Occupation, 
                                      "<br>Year: ", Year, 
                                      "<br>Median Wage: $", Median_Wage))) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  labs(
    title = "Median Hourly Wage Trends (2016-2024)",
    x = "Year",
    y = "Median Hourly Wage",
    color = "Occupation"
  ) +
  theme_minimal()

# Convert to interactive Plotly chart
ggplotly(line_chart, tooltip = "text")

1.7 Multi-Line Time Chart With Slider for Year

Code
# Create the line chart
line_chart <- ggplot(data = creatives_long, 
                     aes(x = as.numeric(Year),  # Convert Year to numeric for slider
                         y = Median_Wage, 
                         group = Occupation, 
                         color = Occupation, 
                          text = paste("Occupation: ", Occupation, 
                                       "<br>Year: ", Year, 
                                       "<br>Median Wage: $", ... = Median_Wage))) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  labs(
    title = "Median Hourly Wage Trends",
    x = "Year",
    y = "Median Hourly Wage",
    color = "Occupation"
  ) +
  theme_minimal() +
  theme(legend.position = "right")  # Keep legend for occupation selection

# Convert to an interactive plot
interactive_chart <- ggplotly(line_chart, tooltip = "text")

# Add a year range slider
interactive_chart <- interactive_chart %>%
  layout(
    title = "Median Hourly Wage Trends Over Time",
    xaxis = list(
      title = "Slide to Adjust Year Range",
      rangeslider = list(visible = TRUE),  # Enable range slider
      type = "linear"
    ),
    yaxis = list(title = "Median Hourly Wage")
  )

# Display the interactive chart
interactive_chart

1.8 Create a Faceted Line Chart Showing Time Line per Occupation

Code
p <- ggplot(creatives_long, aes(x = Year, y = Median_Wage, group = Occupation, color = Occupation)) +
  geom_line(linewidth = 1) +
  facet_wrap(~ Occupation, scales= "free_y") +  # Creates separate plots for each occupation
  labs(
    title = "Median Hourly Wage Trends (2016-2024) by Occupation (using Wrap)",
    x = "Year",
    y = "Median Hourly Wage"
  ) +
  theme_minimal()

ggplotly(p, width = 2000, height = 900) 
Code
p <- ggplot(creatives_long, aes(x = Year, y = Median_Wage, group = Occupation, color = Occupation)) +
  geom_line(linewidth = 1) +
  facet_grid(~ Occupation, scales="free_y") +  # Creates separate plots for each occupation
  labs(
    title = "Median Hourly Wage Trends (2016-2024) by Occupation (As a Grid)",
    x = "Year",
    y = "Median Hourly Wage"
  ) +
  theme_minimal()

ggplotly(p, width = 1500, height = 900)
Code
# Create a grid with each occupation chart on a separate line
p <- ggplot(creatives_long, aes(x = Year, y = Median_Wage, group = Occupation, color = Occupation)) +
  geom_line(linewidth = 1) +
  facet_grid(Occupation ~ ., scales = "free_y", switch = "y") +  # Each occupation on a separate row
  labs(
    title = "Median Hourly Wage Trends (2016-2024) by Occupation (As a Grid)",
    x = "Year",
    y = "Median Hourly Wage"
  ) +
  theme_minimal()



ggplotly(p, width = 1500, height = 900)

2. Using Gapminder data

2.1 Bubble Plot

Code
p <- ggplot(gapminder, aes(gdpPercap, lifeExp,ids = country, color = continent, frame=year)) +
  geom_point(aes(size = pop)) +
  scale_x_log10()

ggplotly(p)

2.2 Adjusting Scale

Code
p <- ggplot(gapminder, aes(gdpPercap, lifeExp, ids = country, color = continent, frame = year)) +
  geom_point(aes(size = pop)) +
  scale_x_log10(breaks = c(500, 1000, 5000, 10000, 50000), labels = scales::comma)

ggplotly(p)

2.3 Adding Animations

Code
p <- ggplot(gapminder, aes(gdpPercap, lifeExp, color = continent)) +
  geom_point(aes(size = pop, frame = year, ids = country)) +
  scale_x_log10()
Warning in geom_point(aes(size = pop, frame = year, ids = country)): Ignoring
unknown aesthetics: frame and ids
Code
animp <- ggplotly(p)

# Add animation options
animp <- animp %>% 
animation_opts(
    1000,          # Duration of each frame (in milliseconds)
    easing = "elastic",  # Type of animation easing (transition effect)
    redraw = FALSE  # Whether to fully redraw the plot at each frame
)
 

animp

2.4 Add Buttons

Code
p <- ggplot(gapminder, aes(gdpPercap, lifeExp, color = continent)) +
  geom_point(aes(size = pop, frame = year, ids = country)) +
  scale_x_log10()
Warning in geom_point(aes(size = pop, frame = year, ids = country)): Ignoring
unknown aesthetics: frame and ids
Code
animp <- ggplotly(p)

animp <- animp %>% 
  animation_opts(
    1000, easing = "elastic", redraw = FALSE
  )

# Position the play button
animp <- animp %>% 
  animation_button(
    x = 1, xanchor = "right", y = 0, yanchor = "bottom"
  )

animp

2.4 Add Slider

Code
p <- ggplot(gapminder, aes(gdpPercap, lifeExp, color = continent)) +
  geom_point(aes(size = pop, frame = year, ids = country)) +
  scale_x_log10()
Warning in geom_point(aes(size = pop, frame = year, ids = country)): Ignoring
unknown aesthetics: frame and ids
Code
ainmp <- ggplotly(p)

animp <- animp %>% 
  animation_opts(
    1000, easing = "elastic", redraw = FALSE
  )

animp <- animp %>% 
  animation_button(
    x = 1, xanchor = "right", y = 0, yanchor = "bottom"
  )
# Add a slider
animp <- animp %>%
  animation_slider(
    currentvalue = list(prefix = "YEAR ", font = list(color="red"))
  )

animp